EIA API - Data Backfill

The goal of this doc is to execute an initial data pull of the hourly demand for California balancing authority subregion (CISO). This includes the following four independent system operators:

The data backfill process includes the following steps:

Load Libraries

Code
import eia_api as api
import eia_data 
import pandas as pd
import numpy as np
import requests
import json
import os
import datetime
import plotly.express as px
Code
raw_json = open("../metadata/series.json")
meta_json = json.load(raw_json)
series = pd.DataFrame(meta_json["series"])
api_path = meta_json["api_path"]
Code
facets_template = {
  "parent" : None,
  "subba" : None
}

start = datetime.datetime(2018, 7, 1, 8)
end = datetime.datetime(2024, 2, 18, 1)

offset = 2250

eia_api_key = os.getenv('EIA_API_KEY')

meta_path = "../metadata/ciso_log_py.csv"
data_path = "../csv/ciso_grid_py.csv"
Code
metadata = api.eia_metadata(api_key = eia_api_key, api_path = api_path)
print(metadata.meta.keys())
print(metadata.meta["startPeriod"])
print(metadata.meta["endPeriod"])
dict_keys(['id', 'name', 'description', 'frequency', 'facets', 'data', 'startPeriod', 'endPeriod', 'defaultDateFormat', 'defaultFrequency'])
2018-06-19T05
2024-02-24T08
Code
for i in series.index:
  facets = facets_template
  facets["parent"] = series.at[i, "parent_id"]
  facets["subba"] = series.at[i, "subba_id"]
  print(facets)
  temp = api.eia_backfill(api_key = eia_api_key, 
        api_path = api_path+ "data", 
        facets = facets, 
        start = start,
        end = end,
        offset = offset) 
  ts_obj = pd.DataFrame(np.arange(start = start, stop = end + datetime.timedelta(hours = 1), step = datetime.timedelta(hours = 1)).astype(datetime.datetime), columns=["index"])
  ts_obj  = ts_obj.merge(temp.data, left_on = "index", right_on = "period", how="left")
  ts_obj.drop("period", axis = 1, inplace= True)
  ts_obj = ts_obj.rename(columns= {"index": "period"})

  meta_temp = eia_data.create_metadata(data = ts_obj, start = start, end = end, type = "backfill")
  meta_temp["index"] = 1
  meta_df = pd.DataFrame([meta_temp])

  if i == series.index.start:
    data = ts_obj
    meta = meta_df
  else:
    data = data._append(ts_obj)
    meta = meta._append(meta_df)
{'parent': 'CISO', 'subba': 'PGAE'}
{'parent': 'CISO', 'subba': 'SCE'}
{'parent': 'CISO', 'subba': 'SDGE'}
{'parent': 'CISO', 'subba': 'VEA'}
Code
print(meta)
# The initial pull has some missing values

data.head()
   index parent subba                             time               start  \
0      1   CISO  PGAE 2024-02-24 19:41:53.318830+00:00 2018-07-01 08:00:00   
0      1   CISO   SCE 2024-02-24 19:42:08.307761+00:00 2018-07-01 08:00:00   
0      1   CISO  SDGE 2024-02-24 19:42:25.350882+00:00 2018-07-01 08:00:00   
0      1   CISO   VEA 2024-02-24 19:42:40.175097+00:00 2018-07-01 08:00:00   

                  end           start_act             end_act  start_match  \
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00         True   
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00         True   
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00         True   
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00         True   

   end_match  n_obs  na      type  update  success  \
0       True  49386  98  backfill   False    False   
0       True  49386  98  backfill   False    False   
0       True  49386  98  backfill   False    False   
0       True  49386  98  backfill   False    False   

                      comments  
0  Missing values were found;   
0  Missing values were found;   
0  Missing values were found;   
0  Missing values were found;   
period subba subba-name parent parent-name value value-units
0 2018-07-01 08:00:00 PGAE Pacific Gas and Electric CISO California Independent System Operator 12522.0 megawatthours
1 2018-07-01 09:00:00 PGAE Pacific Gas and Electric CISO California Independent System Operator 11745.0 megawatthours
2 2018-07-01 10:00:00 PGAE Pacific Gas and Electric CISO California Independent System Operator 11200.0 megawatthours
3 2018-07-01 11:00:00 PGAE Pacific Gas and Electric CISO California Independent System Operator 10822.0 megawatthours
4 2018-07-01 12:00:00 PGAE Pacific Gas and Electric CISO California Independent System Operator 10644.0 megawatthours
Code
# Save the data
d = eia_data.append_data(data_path = data_path, new_data = data, init = True, save = True)
# Save the metadata
meta["success"] = True
meta["update"] = True
m = eia_data.append_metadata(meta_path = "../metadata/ciso_log_py.csv", meta = meta, save = True, init = True)
print(m)
Initial data pull
Save the data to CSV file
   index parent subba                             time               start  \
0      1   CISO  PGAE 2024-02-24 19:41:53.318830+00:00 2018-07-01 08:00:00   
0      1   CISO   SCE 2024-02-24 19:42:08.307761+00:00 2018-07-01 08:00:00   
0      1   CISO  SDGE 2024-02-24 19:42:25.350882+00:00 2018-07-01 08:00:00   
0      1   CISO   VEA 2024-02-24 19:42:40.175097+00:00 2018-07-01 08:00:00   

                  end           start_act             end_act  start_match  \
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00         True   
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00         True   
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00         True   
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00         True   

   end_match  n_obs  na      type  update  success  \
0       True  49386  98  backfill    True     True   
0       True  49386  98  backfill    True     True   
0       True  49386  98  backfill    True     True   
0       True  49386  98  backfill    True     True   

                      comments  
0  Missing values were found;   
0  Missing values were found;   
0  Missing values were found;   
0  Missing values were found;   

Plot the Series

We will use Plotly to visualize the series:

Code
d = data.sort_values(by = ["subba", "period"])

p = px.line(d, x="period", y="value", color="subba")

p.show()